In [1]:
# Computations
import numpy as np
import pandas as pd

# scipy
from scipy.stats import norm

# preprocessing
from sklearn import preprocessing
import re

# Visualisation libraries

## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex

## seaborn
import seaborn as sns
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
sns.set_style("white")

## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors

plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline

## plotly
from plotly.offline import init_notebook_mode, iplot 
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
# Graphics in retina format 
%config InlineBackend.figure_format = 'retina' 

## Progress Bar
import progressbar

## missingno
import missingno as msno

import warnings
warnings.filterwarnings("ignore")

IBM HR Analytics Employee Attrition and Performance Dataset

In this study, we analyze HR data available from kaggle.com

This data is fictional and it is created by IBM data scientists.

Categorical Parameters:

1 2 3 4 5
Education Below College College Bachelor Master Doctor
Environment Satisfaction Low Medium High Very High
Job Involvement Low Medium High Very High
Job Satisfaction Low Medium High Very High
Performance Rating Low Good Excellent Outstanding
Relationship Satisfaction Low Medium High Very High
WorkLife Balance Bad Good Better Best

This can be encoded as follows,

In [2]:
Categorical_Dict = {'Education': {1:'Below College', 2:'College',3:'Bachelor', 4: 'Master', 5:'Doctor'},
                    'Environment Satisfaction': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
                    'Job Involvement': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
                    'Job Satisfaction': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
                    'Performance Rating': {1:'Low', 2:'Good', 3:'Excellent', 4:'Outstanding'},
                    'Relationship Satisfaction': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
                    'Work Life Balance': {1:'Bad', 2:'Good', 3:'Better', 4:'Best'}}

Loading the Dataset

In [3]:
Data = pd.read_excel('Data/WA_Fn-UseC_-HR-Employee-Attrition.xlsx')
Temp = [re.sub(r"(\w)([A-Z])", r"\1 \2", x) for x in Data.columns]
Temp = [x.replace(' Curr ', ' Current ').replace('18',' 18').replace('Num ','Number Of ') for x in Temp]
Data.columns = Temp
del Temp
Labels = ['Remanined', 'Churned']
Data.head().style.hide_index()
Out[3]:
Age Attrition Business Travel Daily Rate Department Distance From Home Education Education Field Employee Count Employee Number Environment Satisfaction Gender Hourly Rate Job Involvement Job Level Job Role Job Satisfaction Marital Status Monthly Income Monthly Rate Number Of Companies Worked Over 18 Over Time Percent Salary Hike Performance Rating Relationship Satisfaction Standard Hours Stock Option Level Total Working Years Training Times Last Year Work Life Balance Years At Company Years In Current Role Years Since Last Promotion Years With Current Manager
41 Yes Travel_Rarely 1102 Sales 1 2 Life Sciences 1 1 2 Female 94 3 2 Sales Executive 4 Single 5993 19479 8 Y Yes 11 3 1 80 0 8 0 1 6 4 0 5
49 No Travel_Frequently 279 Research & Development 8 1 Life Sciences 1 2 3 Male 61 2 2 Research Scientist 2 Married 5130 24907 1 Y No 23 4 4 80 1 10 3 3 10 7 1 7
37 Yes Travel_Rarely 1373 Research & Development 2 2 Other 1 4 4 Male 92 2 1 Laboratory Technician 3 Single 2090 2396 6 Y Yes 15 3 2 80 0 7 3 3 0 0 0 0
33 No Travel_Frequently 1392 Research & Development 3 4 Life Sciences 1 5 4 Female 56 3 1 Research Scientist 3 Married 2909 23159 1 Y Yes 11 3 3 80 0 8 3 3 8 7 3 0
27 No Travel_Rarely 591 Research & Development 2 1 Medical 1 7 1 Male 40 3 1 Laboratory Technician 2 Married 3468 16632 9 Y No 12 3 4 80 1 6 3 3 2 2 2 2

First off, let's take a look at the dataset

In [4]:
def Data_info(Inp, Only_NaN = False):
    Out = pd.DataFrame(Inp.dtypes,columns=['Data Type']).sort_values(by=['Data Type'])
    Out = Out.join(pd.DataFrame(Inp.isnull().sum(), columns=['Number of NaN Values']), how='outer')
    Out['Percentage'] = np.round(100*(Out['Number of NaN Values']/Inp.shape[0]),2)
    if Only_NaN:
        Out = Out.loc[Out['Number of NaN Values']>0]
    return Out
display(Data_info(Data).T[:2])
_ = msno.bar(Data, figsize=(16,3), fontsize=14, log=False, color="#34495e")
Age Attrition Business Travel Daily Rate Department Distance From Home Education Education Field Employee Count Employee Number ... Relationship Satisfaction Standard Hours Stock Option Level Total Working Years Training Times Last Year Work Life Balance Years At Company Years In Current Role Years Since Last Promotion Years With Current Manager
Data Type int64 object object int64 object int64 int64 object int64 int64 ... int64 int64 int64 int64 int64 int64 int64 int64 int64 int64
Number of NaN Values 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

2 rows × 35 columns

Furthermore, the datatype for each columns

In [5]:
def dtypes_group(Inp):
    Temp = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
    Out = pd.DataFrame(index =Temp['Data Type'].unique(), columns = ['Columns'])
    for c in Temp['Data Type'].unique():
        Out.loc[Out.index == c, 'Columns'] = [Temp.loc[Temp['Data Type'] == c].index.tolist()]
    return Out
dtypes = dtypes_group(Data)
display(dtypes)
Columns
int64 [Age, Years In Current Role, Years At Company,...
object [Over 18, Job Role, Education Field, Gender, D...
In [6]:
_ = Data.hist(bins=30, grid=False, figsize=(18,18), color='#34495e', edgecolor='k', zorder=2, rwidth=0.8)

Exploratory Data Analysis

In [7]:
## Attrition Colormap
Att_Colors = ['LightSalmon', 'LightBlue']
Att_LC = 'Black'
# Gender Colormap
MF_Colors = ['HotPink', 'RoyalBlue']
MF_LC = 'Navy'
# Education
Ed_Colors = ['LightCoral','Khaki','GreenYellow','LimeGreen','ForestGreen']
Ed_LC = 'Black'

Age

In [8]:
fig = px.histogram(Data, x = 'Age', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 80])
fig.update_layout(title={'text': 'Age Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()
In [9]:
bins = pd.IntervalIndex.from_tuples([(0, 25), (25, 40), (40, 45),(45, 60)])
Temp = Data[['Gender','Age','Attrition']]
Temp['Age'] = pd.cut(Temp['Age'], bins)
Temp = Temp.groupby(['Gender','Age','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Age'])
Temp['Age'] = Temp['Age'].astype(str)

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Age', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Age', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Age', row=1, col=1)
fig.update_yaxes(title_text='Age', row=2, col=1)
fig.update_layout(title={'text': 'Age Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [10]:
bins = pd.IntervalIndex.from_tuples([(0, 25), (25, 40), (40, 45),(45, 60)])
Temp = Data[['Education','Age','Attrition']]
Temp['Age'] = pd.cut(Temp['Age'], bins)
Temp = Temp.groupby(['Education','Age','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Age'], inplace = True)
Temp['Age'] = Temp['Age'].astype(str)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Age', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,20])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Age', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)

for a in ax:
    _ = a.tick_params(labelsize=14)
#     _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)

Business Travel

In [11]:
Temp = Data.groupby(['Gender','Business Travel','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Business Travel'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Business Travel', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Business Travel', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 40], row=2, col=1)
fig.update_yaxes(title_text='Business Travel', row=1, col=1)
fig.update_yaxes(title_text='Business Travel', row=2, col=1)
fig.update_layout(title={'text': 'Business Travel Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Department

In [12]:
Temp = Data.groupby(['Gender','Department','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Department'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Department', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Department', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 35], row=2, col=1)
fig.update_yaxes(title_text='Department', row=1, col=1)
fig.update_yaxes(title_text='Department', row=2, col=1)
fig.update_layout(title={'text': 'Department Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [13]:
Temp = Data[['Education','Department','Attrition']]
Temp = Temp.groupby(['Education','Department','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Department'], inplace = True)
Temp['Department'] = Temp['Department'].map(lambda x: x.replace('&','and').replace(' ','\n'))
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Department', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,25])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Department', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)

for a in ax:
    _ = a.tick_params(labelsize=14)
#     _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)

Distance from Home

In [14]:
fig = px.histogram(Data, x = 'Distance From Home', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Distance From Home Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()
In [15]:
bins = pd.IntervalIndex.from_tuples([(0, 5), (5, 10), (10, 20),(20, 30)])
Temp = Data[['Gender','Distance From Home','Attrition']]
Temp['Distance From Home'] = pd.cut(Temp['Distance From Home'], bins)
Temp = Temp.groupby(['Gender','Distance From Home','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Distance From Home'])
Temp['Distance From Home'] = Temp['Distance From Home'].astype(str)

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Distance From Home', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Distance From Home', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 550)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Distance From Home', row=1, col=1)
fig.update_yaxes(title_text='Distance From Home', row=2, col=1)
fig.update_layout(title={'text': 'Distance From Home Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Education

In [16]:
Temp = Data[['Gender','Education','Attrition']]
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp.groupby(['Gender','Education','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Education'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Education', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Education', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 700)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Education', row=1, col=1)
fig.update_yaxes(title_text='Education', row=2, col=1)
fig.update_layout(title={'text': 'Education Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Education Field

In [17]:
Temp = Data.groupby(['Gender','Education Field','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Education Field'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Education Field', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Education Field', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 700)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Education Field', row=1, col=1)
fig.update_yaxes(title_text='Education Field', row=2, col=1)
fig.update_layout(title={'text': 'Education Field Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [18]:
Temp = Data[['Education','Education Field','Attrition']]
Temp = Temp.groupby(['Education','Education Field','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Education Field'], inplace = True)
Temp['Education Field'] = Temp['Education Field'].map(lambda x: x.replace('&','and').replace(' ','\n'))

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Education Field', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,14])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Education Field', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)

for a in ax:
    _ = a.tick_params(labelsize=14)
#     _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)

Environment Satisfaction

In [19]:
Temp = Data[['Gender','Environment Satisfaction','Attrition']]
Temp['Environment Satisfaction'] = Temp['Environment Satisfaction'].replace(Categorical_Dict['Environment Satisfaction'])
Temp = Temp.groupby(['Gender','Environment Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Environment Satisfaction'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Environment Satisfaction', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Environment Satisfaction', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Environment Satisfaction', row=1, col=1)
fig.update_yaxes(title_text='Environment Satisfaction', row=2, col=1)
fig.update_layout(title={'text': 'Environment Satisfaction Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Hourly Rate

In [20]:
fig = px.histogram(Data, x = 'Hourly Rate', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 140])
fig.update_layout(title={'text': 'Hourly Rate Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()
In [21]:
bins = pd.IntervalIndex.from_tuples([(25, 50), (50, 75), (75, 100)])
Temp = Data[['Gender','Hourly Rate','Attrition']]
Temp['Hourly Rate'] = pd.cut(Temp['Hourly Rate'], bins)
Temp = Temp.groupby(['Gender','Hourly Rate','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Hourly Rate'])
Temp['Hourly Rate'] = Temp['Hourly Rate'].astype(str)

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Hourly Rate', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Hourly Rate', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Hourly Rate', row=1, col=1)
fig.update_yaxes(title_text='Hourly Rate', row=2, col=1)
fig.update_layout(title={'text': 'Hourly Rate Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [22]:
bins = pd.IntervalIndex.from_tuples([(25, 50), (50, 75), (75, 100)])
Temp = Data[['Education','Hourly Rate','Attrition']]
Temp['Hourly Rate'] = pd.cut(Temp['Hourly Rate'], bins)
Temp = Temp.groupby(['Education','Hourly Rate','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Hourly Rate'], inplace = True)
Temp['Hourly Rate'] = Temp['Hourly Rate'].astype(str)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Hourly Rate', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,14])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Hourly Rate', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)

for a in ax:
    _ = a.tick_params(labelsize=14)
#     _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)

Job Involvement

In [23]:
Temp = Data[['Gender','Job Involvement','Attrition']]
Temp['Job Involvement'] = Temp['Job Involvement'].replace(Categorical_Dict['Job Involvement'])
Temp = Temp.groupby(['Gender','Job Involvement','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Involvement'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Involvement', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Involvement', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Job Involvement', row=1, col=1)
fig.update_yaxes(title_text='Job Involvement', row=2, col=1)
fig.update_layout(title={'text': 'Job Involvement Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Job Level

In [24]:
Temp = Data[['Gender','Job Level','Attrition']]
Temp = Temp.groupby(['Gender','Job Level','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Level'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Level', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Level', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 700)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Job Level', row=1, col=1)
fig.update_yaxes(title_text='Job Level', row=2, col=1)
fig.update_layout(title={'text': 'Job Level Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [25]:
Temp = Data[['Education','Job Level','Attrition']]
Temp = Temp.groupby(['Education','Job Level','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Level'])
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Job Level'], inplace = True)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Job Level', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,12])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Job Level', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)

for a in ax:
    _ = a.tick_params(labelsize=14)
#     _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)

Job Roles

In [26]:
Temp = Data[['Gender','Job Role','Attrition']]
Temp = Temp.groupby(['Gender','Job Role','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Role'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Role', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Role', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 1000)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 12], row=2, col=1)
fig.update_yaxes(title_text='Job Role', row=1, col=1)
fig.update_yaxes(title_text='Job Role', row=2, col=1)
fig.update_layout(title={'text': 'Job Role Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Job Satisfaction

In [27]:
Temp = Data[['Gender','Job Satisfaction','Attrition']]
Temp['Job Satisfaction'] = Temp['Job Satisfaction'].replace(Categorical_Dict['Job Satisfaction'])
Temp = Temp.groupby(['Gender','Job Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Satisfaction'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Satisfaction', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Satisfaction', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Job Satisfaction', row=1, col=1)
fig.update_yaxes(title_text='Job Satisfaction', row=2, col=1)
fig.update_layout(title={'text': 'Job Satisfaction Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [28]:
Temp = Data[['Education','Job Satisfaction','Attrition']]
Temp = Temp.groupby(['Education','Job Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Satisfaction'])
Temp = Temp[Temp.Percentage != 0]
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp['Job Satisfaction'] = Temp['Job Satisfaction'].replace(Categorical_Dict['Job Satisfaction'])
Temp.sort_values(by=['Education Code','Job Satisfaction'], inplace = True)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Job Satisfaction', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,10])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Job Satisfaction', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)

for a in ax:
    _ = a.tick_params(labelsize=14)
#     _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)

Marital Status

In [29]:
Temp = Data[['Gender','Marital Status','Attrition']]
Temp = Temp.groupby(['Gender','Marital Status','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Marital Status'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Marital Status', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Marital Status', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Marital Status', row=1, col=1)
fig.update_yaxes(title_text='Marital Status', row=2, col=1)
fig.update_layout(title={'text': 'Marital Status Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [30]:
Temp = Data[['Education','Marital Status','Attrition']]
Temp = Temp.groupby(['Education','Marital Status','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Marital Status'])
Temp = Temp[Temp.Percentage != 0]
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp.sort_values(by=['Education Code','Marital Status'], inplace = True)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Marital Status', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,20])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Marital Status', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
                palette=sns.set_palette(Ed_Colors),
                edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)

for a in ax:
    _ = a.tick_params(labelsize=14)
#     _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)

Number of Companies Worked

In [31]:
Temp = Data[['Gender','Number Of Companies Worked','Attrition']]
Temp = Temp.groupby(['Gender','Number Of Companies Worked','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Number Of Companies Worked'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Number Of Companies Worked', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Number Of Companies Worked', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 1000)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Number Of Companies Worked', row=1, col=1)
fig.update_yaxes(title_text='Number Of Companies Worked', row=2, col=1)
fig.update_layout(title={'text': 'Number Of Companies Worked Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Over Time

In [32]:
Temp = Data[['Gender','Over Time','Attrition']]
Temp = Temp.groupby(['Gender','Over Time','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Over Time'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Over Time', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Over Time', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 450)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_xaxes(title_text='Percent', range=[0, 40], row=2, col=1)
fig.update_yaxes(title_text='Over Time', row=1, col=1)
fig.update_yaxes(title_text='Over Time', row=2, col=1)
fig.update_layout(title={'text': 'Over Time Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Percent Salary Hike

In [33]:
fig = px.histogram(Data, x = 'Percent Salary Hike', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Percent Salary Hike Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()
In [34]:
bins = pd.IntervalIndex.from_tuples([(10, 15), (15, 20), (20, 25)])
Temp = Data[['Gender','Percent Salary Hike','Attrition']]
Temp['Percent Salary Hike'] = pd.cut(Temp['Percent Salary Hike'], bins)
Temp = Temp.groupby(['Gender','Percent Salary Hike','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Percent Salary Hike'])
Temp['Percent Salary Hike'] = Temp['Percent Salary Hike'].astype(str)

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Percent Salary Hike', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Percent Salary Hike', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 35], row=2, col=1)
fig.update_yaxes(title_text='Percent Salary Hike', row=1, col=1)
fig.update_yaxes(title_text='Percent Salary Hike', row=2, col=1)
fig.update_layout(title={'text': 'Percent Salary Hike Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Performance Rating

In [35]:
Temp = Data[['Gender','Performance Rating','Attrition']]
Temp['Performance Rating'] = Temp['Performance Rating'].replace(Categorical_Dict['Performance Rating'])
Temp = Temp.groupby(['Gender','Performance Rating','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Performance Rating'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Performance Rating', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Performance Rating', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 420)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 45], row=2, col=1)
fig.update_yaxes(title_text='Performance<br>Rating', row=1, col=1)
fig.update_yaxes(title_text='Performance<br>Rating', row=2, col=1)
fig.update_layout(title={'text': 'Performance Rating Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Relationship Satisfaction

In [36]:
Temp = Data[['Gender','Relationship Satisfaction','Attrition']]
Temp['Relationship Satisfaction'] = Temp['Relationship Satisfaction'].replace(Categorical_Dict['Relationship Satisfaction'])
Temp = Temp.groupby(['Gender','Relationship Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Relationship Satisfaction'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Relationship Satisfaction', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Relationship Satisfaction', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Relationship Satisfaction', row=1, col=1)
fig.update_yaxes(title_text='Relationship Satisfaction', row=2, col=1)
fig.update_layout(title={'text': 'Relationship Satisfaction Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Stock Option Level

In [37]:
Temp = Data[['Gender','Stock Option Level','Attrition']]
Temp = Temp.groupby(['Gender','Stock Option Level','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Stock Option Level'])

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Stock Option Level', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Stock Option Level', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 680)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Stock Option Level', row=1, col=1)
fig.update_yaxes(title_text='Stock Option Level', row=2, col=1)
fig.update_layout(title={'text': 'Stock Option Level Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Total Working Years

In [38]:
fig = px.histogram(Data, x = 'Total Working Years', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Total Working Years Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()
In [39]:
bins = pd.IntervalIndex.from_tuples([(-1, 10), (10, 20), (20, 30), (30, 40)])
Temp = Data[['Gender','Total Working Years','Attrition']]
Temp['Total Working Years'] = pd.cut(Temp['Total Working Years'], bins)
Temp = Temp.groupby(['Gender','Total Working Years','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Total Working Years'])
Temp['Total Working Years'] = Temp['Total Working Years'].astype(str)

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Total Working Years', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Total Working Years', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 35], row=2, col=1)
fig.update_yaxes(title_text='Total Working Years', row=1, col=1)
fig.update_yaxes(title_text='Total Working Years', row=2, col=1)
fig.update_layout(title={'text': 'Total Working Years Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Training Times Last Year

In [40]:
fig = px.histogram(Data, x = 'Total Working Years', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Total Working Years Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()

Work-Life Balance Score

In [41]:
Temp = Data[['Gender','Work Life Balance','Attrition']]
Temp['Work Life Balance'] = Temp['Work Life Balance'].replace(Categorical_Dict['Work Life Balance'])
Temp = Temp.groupby(['Gender','Work Life Balance','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)

# Figures

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Work Life Balance', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Work Life Balance', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = MF_Colors)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Work Life Balance', row=1, col=1)
fig.update_yaxes(title_text='Work Life Balance', row=2, col=1)
fig.update_layout(title={'text': 'Work Life Balance Distribution by Gender and Attrition',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

Years at the Company

In [42]:
fig = px.histogram(Data, x = 'Years At Company', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Years At Company Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()

Years In Current Role

In [43]:
fig = px.histogram(Data, x = 'Years In Current Role', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 400])
fig.update_layout(title={'text': 'Years In Current Role Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()

Years Since Last Promotion

In [44]:
fig = px.histogram(Data, x = 'Years Since Last Promotion', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 600])
fig.update_layout(title={'text': 'Years Since Last Promotion Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()

Years With Current Manager

In [45]:
fig = px.histogram(Data, x = 'Years With Current Manager', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
                  hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 350])
fig.update_layout(title={'text': 'Years With Current Manager Distribution by Attrition',
                         'x':0.46, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.show()